import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from fbprophet import Prophet
%matplotlib inline
df = pd.read_csv('avocado.csv')
df.head(20)
df.info()
#Renaming numerical column names. Saving changes to original dataframe.
df.rename(index=str, columns={"4046": "Small Hass", "4225": "Large Hass", "4770": "Extra-Large Hass"}, inplace=True)
#Dropping Unnamed: 0 column
df.drop(['Unnamed: 0'], axis=1, inplace=True)
#Making data column readable by Pandas
df['Date'] = pd.to_datetime(df['Date'])
df.head()
Analyzing categorical data to fully see the range of this set
df['region'].value_counts()
df['region'].nunique()
df['year'].value_counts()
df['type'].value_counts()
Here we see that there are two different types of avocadoes: Conventional and Organic. Lets separate these two types from eachother.
conventional_frame = df[df.type == 'conventional']
organic_frame = df[df.type == 'organic']
organic_frame.info()
organic_frame['AveragePrice'].mean()
conventional_frame['AveragePrice'].mean()
organic_frame[['AveragePrice','Small Hass']]
Finding how many regions are recorded for each type of Avocado as well as how many entries per region
regions_organic = organic_frame.groupby(organic_frame.region)
print("Total regions for Organic avocado:", len(regions_organic))
print("-------------")
for name, group in regions_organic:
print(name, " : ", len(group))
regions_conventional = conventional_frame.groupby(conventional_frame.region)
print("Total regions for Conventional avocado:", len(regions_conventional))
print("-------------")
for name, group in regions_conventional:
print(name, " : ", len(group))
Now we can make predictions based on a specific region we choose from either conventional or organic avocados. Lets start with organic and choose the "TotalUS" region.
date_price = regions_organic.get_group("TotalUS")[['Date', 'AveragePrice']].reset_index(drop=True)
#fig, ax = plt.subplots(figsize=(15,10))
date_price.plot(x='Date', y='AveragePrice', kind="line",figsize=(15,10))
plt.savefig('line_organic_avgp.png',bbox_inches='tight')
#Renaming the columns so they work with the fbprophet library
date_price = date_price.rename(columns={'Date':'ds', 'AveragePrice':'y'})
#Creating & fitting a model. All of the code here is further explained on prophet quick start page https://facebook.github.io/prophet/docs/quick_start.html
m = Prophet()
m.fit(date_price)
# You can get a suitable dataframe that extends into the future a specified number of days using the helper method
# Prophet.make_future_dataframe. By default it will also include the dates from the history, so we will see the
# model fit as well.
future = m.make_future_dataframe(periods=365)
#This shows the last dates that will be forecasted up to
future.tail()
# The predict method will assign each row in future a predicted value which it names yhat. If you pass in historical dates,
# it will provide an in-sample fit. The forecast object here is a new dataframe that includes a column yhat with the forecast,
# as well as columns for components and uncertainty intervals.
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()
# You can plot the forecast by calling the Prophet.plot method and passing in your forecast dataframe.
fig1 = m.plot(forecast)
plt.savefig('forecast_organic.png',bbox_inches='tight')
Here we can see how the individual components of the model affect the predictions
# If you want to see the forecast components, you can use the Prophet.plot_components method. By default you’ll see the trend,
# yearly seasonality, and weekly seasonality of the time series. If you include holidays, you’ll see those here, too.
fig2 = m.plot_components(forecast)
plt.savefig('forecast_organic_components.png',bbox_inches='tight')
Now lets instead analyze conventional avocados in the TotalUS region.
date_price = regions_conventional.get_group("TotalUS")[['Date', 'AveragePrice']].reset_index(drop=True)
date_price.plot(x='Date', y='AveragePrice', kind="line",figsize=(15,10))
plt.savefig('line_conventional_avgp.png',bbox_inches='tight')
date_price = date_price.rename(columns={'Date':'ds', 'AveragePrice':'y'})
#Creating & fitting a model. All of the code here is further explained on prophet quick start page https://facebook.github.io/prophet/docs/quick_start.html
m = Prophet()
m.fit(date_price)
# You can get a suitable dataframe that extends into the future a specified number of days using the helper method
# Prophet.make_future_dataframe. By default it will also include the dates from the history, so we will see the
# model fit as well.
future = m.make_future_dataframe(periods=365)
#This shows the last dates that will be forecasted up to
future.tail()
# The predict method will assign each row in future a predicted value which it names yhat. If you pass in historical dates,
# it will provide an in-sample fit. The forecast object here is a new dataframe that includes a column yhat with the forecast,
# as well as columns for components and uncertainty intervals.
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()
# You can plot the forecast by calling the Prophet.plot method and passing in your forecast dataframe.
fig1 = m.plot(forecast)
plt.savefig('forecast_conventional.png',bbox_inches='tight')
# If you want to see the forecast components, you can use the Prophet.plot_components method. By default you’ll see the trend,
# yearly seasonality, and weekly seasonality of the time series. If you include holidays, you’ll see those here, too.
fig2 = m.plot_components(forecast)
plt.savefig('forecast_conventional_components.png',bbox_inches='tight')